Let’s load in the MiamiR package.
library(MiamiR)
#>
#>
#> +-------------------------------------+
#> | MiamiR |
#> | |
#> | |
#> | * * |
#> | * * * |
#> | * * * * |
#> | * * * * * * * |
#> | * * * * * * * |
#> | * * * * * * * |
#> | * * * * * * * * * * * * * * * * * * |
#> | * * * * * * * * * * * * * * * * * * |
#> | * * * * * * * * * * * * * * * * * * |
#> +-------------------------------------+
#>
#>
#> Thank you for using MiamiR!
#> Citation not required, but greatly appreciated :)
#> Visit Github (https://github.com/CRON-Project/MiamiR) for requests, feedback, and info.
#> Visit www.theCRONproject.com for more data visualisation examples.
#> Here’s how to annotate the coordinates of your data with RSIDs for the index SNPs on each chromosome - these will be added to the data frame in a new column called Lab.
We will be using the Intelligence_Sum_Stats data frame which is part of the MiamiR package and utilising the Annotate_Data() function to do this - it will return a modified data frame called Labelled_Data
Labelled_Data <- Annotate_Data(Data = Intelligence_Sum_Stats,
Chromosome_Column = "CHROM",
Position_Column = "GENPOS", SNP_ID_Column = "ID",
PValue_Column = "P", Build = "HG19")Let’s inspect the data before plotting:
print(head(Labelled_Data, n = 10))
#> # A tibble: 10 × 15
#> SNP UNIQUE_ID CHR POS A1 A2 EAF_HRC Zscore stdBeta SE
#> <chr> <time> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 rs1417896… NA 2 3.99e7 a g 0.969 -0.727 -5.76e-3 0.00792
#> 2 rs11896686 NA 2 1.80e8 t g 0.899 -1.83 -8.34e-3 0.00455
#> 3 rs6428520 NA 1 8.98e7 t c 0.432 -0.119 -3.30e-4 0.00277
#> 4 rs4687424 NA 3 1.93e8 t c 0.894 0.712 3.16e-3 0.00443
#> 5 rs5277262… NA 4 1.08e7 t c 0.00302 -0.936 -2.73e-2 0.0292
#> 6 rs595827 NA 11 1.24e8 t c 0.309 3.50 1.04e-2 0.00296
#> 7 rs1436448… NA 2 1.56e8 a t 0.00202 0.501 1.79e-2 0.0356
#> 8 rs2319468 NA 5 2.46e7 t c 0.967 -0.692 -5.37e-3 0.00775
#> 9 rs354831 NA 9 8.91e7 t c 0.786 1.04 3.51e-3 0.00337
#> 10 rs1501628… NA 15 2.98e7 t c 0.998 -0.865 -3.05e-2 0.0353
#> # ℹ 5 more variables: P <dbl>, N_analyzed <dbl>, minINFO <dbl>,
#> # EffectDirection <chr>, Lab <chr>Here’s how to create a sole Manhattan plot of our labelled summary statistics using the Single_Plot() function - it will return a plotting object and a saved .jpg file.
Manhattan_Plot <- Single_Plot(Data = Labelled_Data,
Chromosome_Column = "CHROM",
Position_Column = "GENPOS", SNP_ID_Column = "ID",
PValue_Column = "P",
Title = "Intelligence",
Chromosome_Colours = c("blue", "turquoise"),
File_Name = "Manhattan_Plot")Here’s how to create a Miami plot of a pair of summary statistics using the Miami_Plot() function - it will return a plotting object and a saved .jpg file.
We will be using the Intelligence_Sum_Stats and Household_Income_Sum_Stats data frames which are part of the MiamiR package.
We have not annotated these data frames as they already have RS codes as their SNP IDs and the MiamiR package will automatically detect this and assign these to a newly created Lab column.
Miami_Plot <- Miami_Plot(Top_Data = Intelligence_Sum_Stats, Bottom_Data = Household_Income_Sum_Stats,
Top_Chromosome_Column = "CHROM",
Top_Position_Column = "GENPOS", Top_SNP_ID_Column = "ID",
Top_PValue_Column = "P",
Top_Colour_One = "green", Top_Colour_Two = "purple",
Bottom_Colour_One = "green", Bottom_Colour_Two = "purple",
Bottom_Chromosome_Column = "CHROM",
Bottom_Position_Column = "GENPOS", Bottom_SNP_ID_Column = "ID",
Bottom_PValue_Column = "P",
Top_Title = "Intelligence", Bottom_Title = "Household Income",
File_Name = "Miami_Plot", Width = 30, Height = 15, Quality = 900,
File_Type = "jpg")# Open a new page for grid drawing
grid::grid.newpage()
# Draw the Miami_Plot grid object
grid::grid.draw(Miami_Plot)The MiamiR package can also be used to inspect key SNPs in single or multiple sets of GWAS summary statistics by using the Forest_Plot() function.
Here’s how to create a Forest Plot centred around BETA values of key SNPs from our previously described pair of summary statistics using the Miami_Plot() function - it will return a plotting object and a saved .jpg file.
setwd("C:/Users/callumon/Miami_Package_R/MiamiR")
Forest_Plot_SNPs_BETA <- Forest_Plot(Data_Sets = c("Household_Income_Sum_Stats", "Intelligence_Sum_Stats"),
Names = c("Income", "IQ"),
Model_Reference = FALSE,
Line_Space = 1.5,
Border_Space_Right = 40,
Border_Space_Left = 20,
Test_Statistic = "BETA", #could be OR
Display_Test_Stat_Se_Column = TRUE,
Display_P_Value_Column = TRUE,
X_Axis_Separation = 0.01,
Pre_Calculated_CIs = FALSE,
X_Axis_Text_Resolution = 2,
Legend_Title = "Study",
Left_Title = "SNP",
P_Value_Title = "p-value",
Test_Stat_Se_Title = "BETA (SE)",
Match_Allele_Direction = TRUE,
Match_Allele_Study = "Household_Income_Sum_Stats",
Selected_SNPs = c("rs74832835", "rs1157671", "rs1790177",
"rs9508063", "rs225682", "rs56201315" ),
File_Name = "Forest_Plot_Intelligence_Income", Width =10, Height = 6, Quality = 900,
File_Type = "jpg"
)The MiamiR package also allows for the use of odds ratios (OR) as well, by modifying the Test_Statistic argument in the Forest_Plot() function.
setwd("C:/Users/callumon/Miami_Package_R/MiamiR")
Forest_Plot_SNPs_OR <- Forest_Plot(Data_Sets = c("LbDementia_Sum_Stats", "LbDementia_Sum_Stats"),
Names = c("Dementia (1)", "Dementia (2)"),
Model_Reference = FALSE,
Line_Space = 1.1,
Border_Space_Left = 7,
Border_Space_Right = 30,
X_Axis_Text_Resolution = 2,
Test_Statistic = "OR",
Display_P_Value_Column = TRUE,
Display_Test_Stat_CI_Column = TRUE,
X_Axis_Separation = 0.2,
Pre_Calculated_CIs = FALSE,
Legend_Title = "Study",
Left_Title = "SNP",
P_Value_Title = "p-value",
Test_Stat_Se_Title = "OR (CI)",
Match_Allele_Direction = TRUE,
Match_Allele_Study = "LbDementia_Sum_Stats",
Selected_SNPs = c("rs59867714","rs7913723","rs79007041",
"rs34624328", "rs492457",
"rs7974838" ),
File_Name = "Dementia", Width =10, Height = 6, Quality = 900,
File_Type = "jpg"
)The MiamiR package also allows for the same figures to be produced but for covariate effects from the raw outputs of lm() or glm() models in base R, when saved as objects and munged, using the Model_Munge() function before using the Forest_Plot() function.
Here’s how to process both glm and lm model objects. We will be using the Model and Model2 regression model objects which are part of the MiamiR package, which are linear and logistic regression models of Dementia diagnoses and BMI measures, respectively, based off of simulated data called Fake_Demo_Data, also available in the MiamiR package.
ModelSumGLM <- Model_Munge(Model_Object = "Model", Model_Type = "glm") #Dementia
ModelSumLM <- Model_Munge(Model_Object = "Model2", Model_Type = "lm") # BMILet’s inspect the data before plotting:
print(ModelSumGLM)
#> Covariate OR SE P group Reference
#> 2 Black 1.0043834 0.0192586204 0.82038675 Ethnicity Asian
#> 3 Hispanic 0.9959696 0.0191532242 0.83304346 Ethnicity Asian
#> 4 Other 0.9954373 0.0188039063 0.80790087 Ethnicity Asian
#> 5 White 0.9965701 0.0186876810 0.85416478 Ethnicity Asian
#> 6 Age 0.9998680 0.0002888248 0.64768179 Age None
#> 7 Male 0.9760366 0.0120966015 0.04522139 Sex Female
#> 8 BMI 0.9983169 0.0012295248 0.17098721 BMI None
#> 9 Suburban 0.9879690 0.0147833635 0.41312323 Location Rural
#> 10 Urban 1.0330746 0.0149870916 0.03015565 Location Rural
print(ModelSumLM)
#> Covariate BETA SE P group Reference
#> 2 Black 0.133180297 0.497341325 0.7889210 Ethnicity Asian
#> 3 Hispanic -0.236406456 0.494578604 0.6327588 Ethnicity Asian
#> 4 Other 0.491017257 0.485367263 0.3119571 Ethnicity Asian
#> 5 White -0.090114751 0.482601835 0.8519130 Ethnicity Asian
#> 6 Age 0.002009237 0.007459302 0.7877089 Age None
#> 7 Male -0.190246954 0.312965313 0.5434031 Sex Female
#> 8 Dementia -1.123403862 0.819983063 0.1709872 Dementia None
#> 9 Suburban 0.205108285 0.381848209 0.5912874 Location Rural
#> 10 Urban 0.613622574 0.387465677 0.1135854 Location RuralNow let’s plot the covariate effects from the munged linear model data frame, indicating that we are using a munged model object by setting the Model_Reference argument to TRUE in the Forest_Plot() function.
setwd("C:/Users/callumon/Miami_Package_R/MiamiR")
Forest_Plot_Model_LM <- Forest_Plot(Data_Sets = c("ModelSumLM", "ModelSumLM"),
Names = c("BMI LM (1)", "BMI LM (2)"),
Model_Reference = TRUE,
Test_Statistic = "BETA", #could be OR
Display_P_Value_Column = TRUE,
Display_Test_Stat_Se_Column = TRUE,
X_Axis_Separation = 0.8,
Border_Space_Left = 30,
Border_Space_Right = 75,
Strips = TRUE,
Pre_Calculated_CIs = FALSE,
Legend_Title = "Model",
Left_Title = "Covariate",
Test_Stat_Se_Title = "BETA (SE)",
File_Name = "Forest_Plot_LM", Width =10, Height = 6, Quality = 900,
File_Type = "jpg"
)The MiamiR package also formats model plots based off of OR test statistics as well. Let’s plot the glm, again via modifying the Test_Statistic argument in the Forest_Plot function.
setwd("C:/Users/callumon/Miami_Package_R/MiamiR")
Forest_Plot_Model_GLM <- Forest_Plot(Data_Sets = c("ModelSumGLM", "ModelSumGLM"),
Names = c("Dementia GLM (1)", "Dementia GLM (2)"),
Model_Reference = TRUE,
Test_Statistic = "OR",
X_Axis_Text_Resolution = 2,
Border_Space_Left = 2.8,
Border_Space_Right = 4.7,
Display_Test_Stat_CI_Column = TRUE,
Display_P_Value_Column = TRUE,
X_Axis_Separation = 0.02,
Pre_Calculated_CIs = FALSE,
Legend_Title = "Model",
Left_Title = "Covariate",
P_Value_Title = "p-value",
Test_Stat_Se_Title = "OR (CI)",
File_Name = "Forest_Plot_GLM", Width =10, Height = 6, Quality = 900,
File_Type = "jpg"
)